#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr)
library(biomaRt)
library(anRichment) ; library(BrainDiseaseCollection)
suppressWarnings(suppressMessages(library(WGCNA)))

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

Load preprocessed dataset (preprocessing code in 20_03_03_data_preprocessing.Rmd) and clustering (pipeline in 20_03_03_WGCNA.Rmd)

# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame


# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>% 
              mutate('ID'=as.character(ensembl_gene_id)) %>% 
              dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
              mutate('Neuronal'=1)


# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_08-29-2019_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]


# Clusterings
clusterings = read_csv('./../Data/clusters.csv')


# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>% 
  mutate(`gene-score`=ifelse(is.na(`gene-score`), 'None', `gene-score`)) %>%
  left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
  mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
  mutate(gene.score=ifelse(`gene-score`=='None' & Neuronal==1, 'Neuronal', `gene-score`), 
         significant=padj<0.05 & !is.na(padj))

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org')
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)

genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))


clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]

dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]


rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds)


Relation to external clinical traits


Quantifying module-trait associations


Using the hetcor function, that calculates Pearson, polyserial or polychoric correlations depending on the type of variables involved.

datTraits = datMeta %>% dplyr::select(Diagnosis, Brain_lobe, Sex, Age, PMI, RNAExtractionBatch) %>%
            dplyr::rename('ExtractionBatch' = RNAExtractionBatch)

# Recalculate MEs with color labels
ME_object = datExpr %>% t %>% moduleEigengenes(colors = genes_info$Module)
MEs = orderMEs(ME_object$eigengenes)

# Calculate correlation between eigengenes and the traits and their p-values
moduleTraitCor = MEs %>% apply(2, function(x) hetcor(x, datTraits)$correlations[1,-1]) %>% t
rownames(moduleTraitCor) = colnames(MEs)
colnames(moduleTraitCor) = colnames(datTraits)
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nrow(datExpr))

# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)

# In case there are any NAs
if(sum(!complete.cases(moduleTraitCor))>0){
  print(paste0(sum(is.na(moduleTraitCor)),' correlation(s) could not be calculated')) 
}
## [1] "1 correlation(s) could not be calculated"
rm(ME_object)

Note: The correlations between Modules and Diagonsis that cannot be calculated, weirdly enough, is because the initial correlation is too high, so it would be a very bad thing to lose these modules because of this numerical error. I’m going to fill in the values using the polyserial function, which doesn’t give exactly the same results as the hetcor() function, but it’s quite similar.

# Calculate the correlation tha failed with hetcor()
missing_modules = rownames(moduleTraitCor)[is.na(moduleTraitCor[,1])]

for(m in missing_modules){
  cat(paste0('Correcting Module-Diagnosis correlation for Module ', m))
  moduleTraitCor[m,'Diagnosis'] = polyserial(MEs[,m], datTraits$Diagnosis)
}
## Correcting Module-Diagnosis correlation for Module ME#D39200
## Warning in polyserial(MEs[, m], datTraits$Diagnosis): initial correlation
## inadmissible, -1.02954073623291, set to -0.9999
rm(missing_modules)

I’m going to select all the modules that have an absolute correlation higher than 0.9 with Diagnosis to study them

# Sort moduleTraitCor by Diagnosis
moduleTraitCor = moduleTraitCor[order(moduleTraitCor[,1], decreasing=TRUE),]
moduleTraitPvalue = moduleTraitPvalue[order(moduleTraitCor[,1], decreasing=TRUE),]

# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)


labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels =  gsub('ME','',rownames(moduleTraitCor)), 
               yColorWidth=0, colors = brewer.pal(11,'PiYG'), bg.lab.y = gsub('ME','',rownames(moduleTraitCor)),
               textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.8, cex.lab.y = 0.75, zlim = c(-1,1),
               main = paste('Module-Trait relationships'))

diagnosis_cor = data.frame('Module' = gsub('ME','',rownames(moduleTraitCor)),
                           'MTcor' = moduleTraitCor[,'Diagnosis'],
                           'MTpval' = moduleTraitPvalue[,'Diagnosis'])

genes_info = genes_info %>% left_join(diagnosis_cor, by='Module')

rm(moduleTraitPvalue, datTraits, textMatrix, diagnosis_cor)


Studying the modules with the highest absolute correlation to Diagnosis


top_modules = gsub('ME','',rownames(moduleTraitCor)[abs(moduleTraitCor[,'Diagnosis'])>0.9])

cat(paste0('Top modules selected: ', paste(top_modules, collapse=', '),'\n'))
## Top modules selected: #D39200

There’s only one module with a correlation higher than 0.9, so I’m going to include the largest positive correlation module, which has a correlation of 0.88

top_modules = gsub('ME','',rownames(moduleTraitCor)[moduleTraitCor[,'Diagnosis']>0.88 |
                                                    moduleTraitCor[,'Diagnosis']< -0.9])

cat(paste0('Top modules selected: ', paste(top_modules, collapse=', '),'\n'))
## Top modules selected: #00BC5A, #D39200

The modules consist mainly of points with very high (absolute) values in PC2 (which we know is related to lfc), so this result is consistent with the high correlation between Module and Diagnosis, although some of the points with the highest PC2 values do not belong to these top modules

The module with the positive correlation to Diagnosis is very big

pca = datExpr %>% prcomp

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
            dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
            mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
            mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
                   alpha = ifelse(ImportantModules=='Others', 0.2, 0.4),
                   gene_id = paste0(ID, ' (', external_gene_id, ')'))

table(plot_data$ImportantModules)
## 
## #00BC5A #D39200  Others 
##    2278     367   13654
ggplotly(plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) + 
         geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() + 
           ggtitle('Modules with strongest relation to Diagnosis'))
rm(pca)




Module Membership vs Gene Significance


create_plot = function(module){
  
  plot_data = dataset %>% dplyr::select(ID, paste0('MM.',gsub('#','',module)), GS, gene.score) %>% filter(dataset$Module==module)
  colnames(plot_data)[2] = 'Module'
  
  SFARI_colors = as.numeric(names(table(as.character(plot_data$gene.score)[plot_data$gene.score!='None'])))
  
  p = ggplotly(plot_data %>% ggplot(aes(Module, GS, color=gene.score)) + geom_point(alpha=0.5, aes(ID=ID)) + ylab('Gene Significance') +
               scale_color_manual(values=SFARI_colour_hue(r=c(SFARI_colors,8))) + theme_minimal() + xlab('Module Membership') +
               ggtitle(paste0('Module ', module,'  (MTcor = ', round(moduleTraitCor[paste0('ME',module),1],2),')')))
  
  return(p)
}

create_plot(top_modules[1])
create_plot(top_modules[2])
rm(create_plot)




SFARI Genes


List of top SFARI Genes in top modules ordered by SFARI score and Gene Significance

table_data = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
             dplyr::select(ID, external_gene_id, GS, gene.score, Module) %>% arrange(gene.score, desc(abs(GS))) %>%
             dplyr::rename('Ensembl ID'=ID, 'Gene Symbol'=external_gene_id, 
                    'SFARI score'=gene.score, 'Gene Significance'=GS)

kable(table_data %>% filter(Module == top_modules[1] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[1]))
Top SFARI Genes for Module #00BC5A
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000049618 ARID1B 0.3366021 1
ENSG00000108510 MED13 0.8290707 2
ENSG00000114166 KAT2B 0.8283772 2
ENSG00000147050 KDM6A 0.7012818 2
ENSG00000038382 TRIO 0.6534757 2
ENSG00000257923 CUX1 0.6211401 2
ENSG00000112851 ERBB2IP 0.6039454 2
ENSG00000123066 MED13L 0.5682006 2
ENSG00000117139 KDM5B 0.5254979 2
ENSG00000100354 TNRC6B 0.5219282 2
ENSG00000165186 PTCHD1 0.2358762 2
ENSG00000215301 DDX3X 0.1783716 2
ENSG00000181722 ZBTB20 0.9181273 3
ENSG00000141646 SMAD4 0.8727621 3
ENSG00000181090 EHMT1 0.8445343 3
ENSG00000116117 PARD3B 0.8355293 3
ENSG00000168769 TET2 0.8186246 3
ENSG00000112655 PTK7 0.7449802 3
ENSG00000083168 KAT6A 0.7156954 3
ENSG00000259207 ITGB3 0.6981423 3
ENSG00000079482 OPHN1 0.6888942 3
ENSG00000146247 PHIP 0.6580814 3
ENSG00000197724 PHF2 0.6446488 3
ENSG00000008083 JARID2 0.6262088 3
ENSG00000117362 APH1A 0.5953940 3
ENSG00000124126 PREX1 0.4966418 3
ENSG00000148737 TCF7L2 0.4528527 3
ENSG00000132510 KDM6B 0.4443304 3
ENSG00000196628 TCF4 0.4160231 3
ENSG00000206190 ATP10A 0.4044905 3
ENSG00000113742 CPEB4 0.3986469 3
ENSG00000050344 NFE2L3 0.3515540 3
ENSG00000180914 OXTR 0.3269342 3
ENSG00000196092 PAX5 0.2918344 3
ENSG00000165995 CACNB2 0.2850210 3
ENSG00000204406 MBD5 0.2243562 3
ENSG00000166148 AVPR1A 0.1968984 3
ENSG00000140557 ST8SIA2 -0.0619562 3
kable(table_data %>% filter(Module == top_modules[2] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[2]))
Top SFARI Genes for Module #D39200
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000186487 MYT1L -0.1986426 1
ENSG00000144285 SCN1A -0.8655277 3
ENSG00000197535 MYO5A -0.8028405 3
ENSG00000104388 RAB2A -0.7831104 3
ENSG00000005955 GGNBP2 -0.7580022 3
ENSG00000170745 KCNS3 -0.5624743 3
ENSG00000138411 HECW2 -0.2744052 3
ENSG00000168116 KIAA1586 -0.2680971 3

Modules with the strongest module-diagnosis correlation should have the highest percentage of SFARI Genes, but this doesn’t seem to be the case

plot_data = dataset %>% mutate('hasSFARIscore' = gene.score!='None') %>% 
            group_by(Module, MTcor, hasSFARIscore) %>% summarise(p=n()) %>% 
            left_join(dataset %>% group_by(Module) %>% summarise(n=n()), by='Module') %>% 
            mutate(p=round(p/n*100,2)) 

for(i in 1:nrow(plot_data)){
  this_row = plot_data[i,]
  if(this_row$hasSFARIscore==FALSE & this_row$p==100){
    new_row = this_row
    new_row$hasSFARIscore = TRUE
    new_row$p = 0
    plot_data = plot_data %>% rbind(new_row)
  }
}

plot_data = plot_data %>% filter(hasSFARIscore==TRUE)

ggplotly(plot_data %>% ggplot(aes(MTcor, p, size=n)) + geom_smooth(color='gray', se=FALSE) +
         geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) + geom_hline(yintercept=mean(plot_data$p), color='gray') +
         xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
         theme_minimal() + theme(legend.position = 'none'))
rm(i, this_row, new_row, plot_data)

Breaking the SFARI genes by score

scores = c(1,2,3,4,5,6,'None')

plot_data = dataset %>% group_by(Module, MTcor, gene.score) %>% summarise(n=n()) %>% 
            left_join(dataset %>% group_by(Module) %>% summarise(N=n()), by='Module') %>% 
            mutate(p=round(n/N*100,2), gene.score = as.character(gene.score))

for(i in 1:nrow(plot_data)){
  this_row = plot_data[i,]
  if(sum(plot_data$Module == this_row$Module)<7){
    missing_scores = which(! scores %in% plot_data$gene.score[plot_data$Module == this_row$Module])
    for(s in missing_scores){
      new_row = this_row
      new_row$gene.score = as.character(s)
      new_row$n = 0
      new_row$p = 0
      plot_data = plot_data %>% rbind(new_row) 
    }
  }
}

plot_data = plot_data %>% filter(gene.score != 'None')

plot_function = function(i){
  i = 2*i-1
  plot_list = list()
  for(j in 1:2){
    plot_list[[j]] = ggplotly(plot_data %>% filter(gene.score==scores[i+j-1]) %>% ggplot(aes(MTcor, p, size=n)) + 
                geom_smooth(color='gray', se=FALSE) +
                geom_point(color=plot_data$Module[plot_data$gene.score==scores[i+j-1]], alpha=0.5, aes(id=Module)) +
                geom_hline(yintercept=mean(plot_data$p[plot_data$gene.score==scores[i+j-1]]), color='gray') +
                xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
                theme_minimal() + theme(legend.position = 'none'))
  }
  p = subplot(plot_list, nrows=1) %>% layout(annotations = list(
    list(x = 0.2 , y = 1.05, text = paste0('SFARI score ', scores[i]), showarrow = F, xref='paper', yref='paper'),
    list(x = 0.8 , y = 1.05, text = paste0('SFARI score ', scores[i+1]), showarrow = F, xref='paper', yref='paper')))
  
  return(p)
}

plot_function(1)
plot_function(2)
plot_function(3)
rm(i, s, this_row, new_row, plot_function)




Module Eigengenes


Since these modules have the strongest relation to autism, this pattern should be reflected in their model eigengenes, having two different behaviours for the samples corresponding to autism and the ones corresponding to control.

In both cases, the Eigengenes separate the behaviour between autism and control samples very clearly!

plot_EGs = function(module){

  plot_data = data.frame('ID' = rownames(MEs), 'MEs' = MEs[,paste0('ME',module)], 'Diagnosis' = datMeta$Diagnosis)

  p = plot_data %>% ggplot(aes(Diagnosis, MEs, fill=Diagnosis)) + geom_boxplot() + theme_minimal() + theme(legend.position='none') +
                    ggtitle(paste0('Module ', module, '  (MTcor=',round(moduleTraitCor[paste0('ME',module),1],2),')'))
  return(p)
}

p1 = plot_EGs(top_modules[1])
p2 = plot_EGs(top_modules[2])

grid.arrange(p1, p2, nrow=1)

rm(plot_EGs, p1, p2)




Identifying important genes


Selecting the modules with the highest correlation to Diagnosis, and, from them, the genes with the highest module membership-(absolute) gene significance

*Ordered by \(\frac{MM+|GS|}{2}\)

There aren’t many SFARI genes in the top genes of each module, and all of the have a score of 5

create_table = function(module){
  top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>% 
              dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
              filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>% 
              mutate(importance = (MM+abs(GS))/2) %>% arrange(by=-importance) %>% top_n(20)
  return(top_genes)
}

top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])

kable(top_genes[[1]], caption=paste0('Top 10 genes for module ', top_modules[1], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[1]),1],2),')'))
Top 10 genes for module #00BC5A (MTcor = 0.88)
ID external_gene_id MM GS gene.score importance
ENSG00000143384 MCL1 0.9427142 0.8959477 None 0.9193309
ENSG00000150457 LATS2 0.9151257 0.9094471 None 0.9122864
ENSG00000147065 MSN 0.9106850 0.9114844 5 0.9110847
ENSG00000150907 FOXO1 0.8681347 0.9245993 None 0.8963670
ENSG00000116044 NFE2L2 0.9017548 0.8708091 None 0.8862819
ENSG00000148841 ITPRIP 0.8551664 0.9156699 None 0.8854182
ENSG00000151491 EPS8 0.8581006 0.9116522 5 0.8848764
ENSG00000003402 CFLAR 0.8686402 0.8943214 None 0.8814808
ENSG00000089159 PXN 0.8589460 0.9022448 None 0.8805954
ENSG00000183864 TOB2 0.8394053 0.9181283 None 0.8787668
ENSG00000120063 GNA13 0.8268945 0.9302263 None 0.8785604
ENSG00000120690 ELF1 0.8924432 0.8619871 None 0.8772152
ENSG00000158615 PPP1R15B 0.8619953 0.8878566 None 0.8749259
ENSG00000173530 TNFRSF10D 0.8455473 0.8979312 None 0.8717392
ENSG00000084093 REST 0.8197831 0.9207914 None 0.8702873
ENSG00000163629 PTPN13 0.8638131 0.8668572 None 0.8653351
ENSG00000102699 PARP4 0.8332139 0.8920497 None 0.8626318
ENSG00000161638 ITGA5 0.8253777 0.8967534 None 0.8610655
ENSG00000101871 MID1 0.7762770 0.9457893 None 0.8610332
ENSG00000137693 YAP1 0.8661341 0.8529069 None 0.8595205
kable(top_genes[[2]], caption=paste0('Top 10 genes for module ', top_modules[2], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[2]),1],2),')'))
Top 10 genes for module #D39200 (MTcor = -1)
ID external_gene_id MM GS gene.score importance
ENSG00000165169 DYNLT3 0.9076883 -0.9200616 None 0.9138749
ENSG00000123091 RNF11 0.9275239 -0.8986339 None 0.9130789
ENSG00000197860 SGTB 0.8767247 -0.9302899 None 0.9035073
ENSG00000204116 CHIC1 0.9068275 -0.8992826 None 0.9030551
ENSG00000169139 UBE2V2 0.9091857 -0.8846635 None 0.8969246
ENSG00000184203 PPP1R2 0.8535499 -0.9071902 None 0.8803701
ENSG00000213585 VDAC1 0.8737078 -0.8863331 None 0.8800204
ENSG00000175582 RAB6A 0.8306959 -0.9243819 None 0.8775389
ENSG00000163577 EIF5A2 0.8412486 -0.9069291 None 0.8740888
ENSG00000040341 STAU2 0.8918535 -0.8541028 None 0.8729781
ENSG00000197885 NKIRAS1 0.8607468 -0.8810389 None 0.8708928
ENSG00000175395 ZNF25 0.8274191 -0.9043655 None 0.8658923
ENSG00000124785 NRN1 0.8288069 -0.9013313 None 0.8650691
ENSG00000075303 SLC25A40 0.8386439 -0.8856682 None 0.8621560
ENSG00000108946 PRKAR1A 0.8467692 -0.8765254 None 0.8616473
ENSG00000213424 KRT222 0.8437953 -0.8731470 None 0.8584711
ENSG00000197170 PSMD12 0.7878272 -0.9257621 None 0.8567946
ENSG00000150768 DLAT 0.7872120 -0.9244614 None 0.8558367
ENSG00000188730 VWC2 0.8284389 -0.8662499 None 0.8473444
ENSG00000085788 DDHD2 0.8011298 -0.8926130 None 0.8468714
rm(create_table)
pca = datExpr %>% prcomp

ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
            mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
            mutate(alpha = ifelse(color %in% top_modules & 
                                  ID %in% ids, 1, 0.1))

plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) + 
              theme_minimal() + ggtitle('Important genes identified through WGCNA')

Level of expression by Diagnosis for top genes, ordered by importance (defined above)

create_plot = function(i){
  
  plot_data = datExpr[rownames(datExpr) %in% top_genes[[i]]$ID,] %>% mutate('ID' = rownames(.)) %>% 
              melt(id.vars='ID') %>% mutate(variable = gsub('X','',variable)) %>%
              left_join(top_genes[[i]], by='ID') %>%
              left_join(datMeta %>% dplyr::select(Dissected_Sample_ID, Diagnosis),
                        by = c('variable'='Dissected_Sample_ID')) %>% arrange(desc(importance))
  
  p = ggplotly(plot_data %>% mutate(external_gene_id=factor(external_gene_id, 
                                    levels=unique(plot_data$external_gene_id), ordered=T)) %>%
               ggplot(aes(external_gene_id, value, fill=Diagnosis)) + geom_boxplot() + theme_minimal() +
                      xlab(paste0('Top genes for module ', top_modules[i], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[i]][1],2), ')')) + ylab('Level of Expression') +
                      theme(axis.text.x = element_text(angle = 90, hjust = 1)))
  return(p)
  
}

create_plot(1)
create_plot(2)
rm(create_plot)




Enrichment Analysis


Using the package anRichment

# Prepare dataset

# Create dataset with top modules membership and removing the genes without an assigned module
EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID,
                        module = ifelse(genes_info$Module %in% top_modules, genes_info$Module, 'other')) %>%
             filter(genes_info$Module!='gray')

# Assign Entrez Gene Id to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl', host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=EA_dataset$ensembl_gene_id, mart=mart)
## Cache found
EA_dataset = EA_dataset %>% left_join(biomart_output, by='ensembl_gene_id')

for(tm in top_modules){
  cat(paste0('\n',sum(EA_dataset$module==tm & is.na(EA_dataset$entrezgene)), ' genes from top module ',
             tm, ' don\'t have an Entrez Gene ID')) 
}
## 
## 36 genes from top module #00BC5A don't have an Entrez Gene ID
## 6 genes from top module #D39200 don't have an Entrez Gene ID
rm(getinfo, mart, biomart_output, tm)
# Manual: https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/GeneAnnotation/Tutorials/anRichment-Tutorial1.pdf

collectGarbage()

# Prepare datasets
GO_col = buildGOcollection(organism = 'human', verbose = 0)
## Loading required package: org.Hs.eg.db
## 
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
internal_col = internalCollection(organism = 'human')
MillerAIBS_col = MillerAIBSCollection(organism = 'human')
BrainDisease_col = BrainDiseaseCollection(organism = 'human')
combined_col = mergeCollections(GO_col, internal_col, MillerAIBS_col, BrainDisease_col)

# Print collections used
cat('Using collections: ')
## Using collections:
knownGroups(combined_col, sortBy = 'size')
##  [1] "GO"                                         
##  [2] "GO.BP"                                      
##  [3] "GO.MF"                                      
##  [4] "GO.CC"                                      
##  [5] "JA Miller at AIBS"                          
##  [6] "Chip-X enrichment analysis (ChEA)"          
##  [7] "Brain"                                      
##  [8] "JAM"                                        
##  [9] "Prenatal brain"                             
## [10] "Brain region markers"                       
## [11] "Cortex"                                     
## [12] "Brain region marker enriched gene sets"     
## [13] "WGCNA"                                      
## [14] "BrainRegionMarkers"                         
## [15] "BrainRegionMarkers.HBA"                     
## [16] "BrainRegionMarkers.HBA.localMarker(top200)" 
## [17] "Postnatal brain"                            
## [18] "ImmunePathways"                             
## [19] "Markers of cortex layers"                   
## [20] "BrainLists"                                 
## [21] "Cell type markers"                          
## [22] "Germinal brain"                             
## [23] "BrainRegionMarkers.HBA.globalMarker(top200)"
## [24] "Accelerated evolution"                      
## [25] "Postmitotic brain"                          
## [26] "BrainLists.Blalock_AD"                      
## [27] "BrainLists.DiseaseGenes"                    
## [28] "BloodAtlases"                               
## [29] "Verge Disease Genes"                        
## [30] "BloodAtlases.Whitney"                       
## [31] "BrainLists.JAXdiseaseGene"                  
## [32] "BrainLists.MO"                              
## [33] "Age-associated genes"                       
## [34] "BrainLists.Lu_Aging"                        
## [35] "Cell type marker enriched gene sets"        
## [36] "BrainLists.CA1vsCA3"                        
## [37] "BrainLists.MitochondrialType"               
## [38] "BrainLists.MO.2+_26Mar08"                   
## [39] "BrainLists.MO.Sugino"                       
## [40] "BloodAtlases.Gnatenko2"                     
## [41] "BloodAtlases.Kabanova"                      
## [42] "BrainLists.Voineagu"                        
## [43] "StemCellLists"                              
## [44] "StemCellLists.Lee"
# Perform Enrichment Analysis
enrichment = enrichmentAnalysis(classLabels = EA_dataset$module, identifiers = EA_dataset$entrezgene,
                                refCollection = combined_col, #useBackground = 'given', 
                                threshold = 1e-4, thresholdType = 'Bonferroni',
                                getOverlapEntrez = FALSE, getOverlapSymbols = TRUE)
##  enrichmentAnalysis: preparing data..
##   ..working on label set 1 ..


Results


kable(enrichment$enrichmentTable %>% filter(class==top_modules[1]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, desc(enrichmentRatio)),
      caption = paste0('Enriched terms for module ', top_modules[1], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[1]][1],4), ')'))
Enriched terms for module #00BC5A (MTcor = 0.8803)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000143 Lowest in CP of 13-16 post-conception weeks human JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex 0.00e+00 0e+00 1.912662 2234 1450 386
JAMiller.AIBS.000009 VZ markers at 15-16 post-conception weeks JA Miller at AIBS|Brain|Prenatal brain|Cortex|Markers of cortex layers|Germinal brain 0.00e+00 0e+00 1.931293 2234 1250 336
JAM:003025 Red Nucleus_IN_Mesencephalon JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 3.550663 2234 172 85
GO:0002376 immune system process GO|GO.BP 0.00e+00 0e+00 1.533411 2234 2399 512
GO:0007166 cell surface receptor signaling pathway GO|GO.BP 0.00e+00 0e+00 1.511710 2234 2538 534
GO:0007165 signal transduction GO|GO.BP 0.00e+00 0e+00 1.322756 2234 4894 901
JAM:002861 Globus Pallidus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 3.524653 2234 159 78
JAMiller.AIBS.000148 Highest in VZ of 13-16 post-conception weeks human JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Germinal brain 0.00e+00 0e+00 2.069497 2234 677 195
GO:0007154 cell communication GO|GO.BP 0.00e+00 0e+00 1.288971 2234 5340 958
GO:0023052 signaling GO|GO.BP 0.00e+00 0e+00 1.289038 2234 5323 955
GO:0050896 response to stimulus GO|GO.BP 0.00e+00 0e+00 1.213397 2234 7372 1245
JAMiller.AIBS.000098 Cortical oligodendrocytes (Olig2) JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex 0.00e+00 0e+00 1.500797 2234 2274 475
JAMiller.AIBS.000204 RegionalWGCNA midfetal M34 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.00e+00 0e+00 2.321003 2234 421 136
GO:0048513 animal organ development GO|GO.BP 0.00e+00 0e+00 1.419661 2234 2905 574
GO:0070887 cellular response to chemical stimulus GO|GO.BP 0.00e+00 0e+00 1.426692 2234 2795 555
GO:0005886 plasma membrane GO|GO.CC 0.00e+00 0e+00 1.313559 2234 4343 794
GO:0051716 cellular response to stimulus GO|GO.BP 0.00e+00 0e+00 1.238449 2234 6167 1063
GO:0071944 cell periphery GO|GO.CC 0.00e+00 0e+00 1.307368 2234 4446 809
GO:0009605 response to external stimulus GO|GO.BP 0.00e+00 0e+00 1.519661 2234 1929 408
GO:0032502 developmental process GO|GO.BP 0.00e+00 0e+00 1.262170 2234 5294 930
JAM:003019 Putamen_IN_Striatum JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 3.143381 2234 160 70
GO:0048583 regulation of response to stimulus GO|GO.BP 0.00e+00 0e+00 1.342098 2234 3635 679
JAMiller.AIBS.000097 Cortical astrocytes JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex 0.00e+00 0e+00 1.457574 2234 2302 467
GO:0022610 biological adhesion GO|GO.BP 0.00e+00 0e+00 1.656290 2234 1258 290
GO:0071310 cellular response to organic substance GO|GO.BP 0.00e+00 0e+00 1.454967 2234 2316 469
GO:0048856 anatomical structure development GO|GO.BP 0.00e+00 0e+00 1.271492 2234 4967 879
JAM:003092 upAging_oligo JAM|BrainLists|BrainLists.Lu_Aging 0.00e+00 0e+00 2.076236 2234 526 152
GO:0006955 immune response GO|GO.BP 0.00e+00 0e+00 1.566221 2234 1601 349
GO:0007155 cell adhesion GO|GO.BP 0.00e+00 0e+00 1.651431 2234 1253 288
GO:0048518 positive regulation of biological process GO|GO.BP 0.00e+00 0e+00 1.257044 2234 5247 918
GO:0042221 response to chemical GO|GO.BP 0.00e+00 0e+00 1.325881 2234 3777 697
GO:0009888 tissue development GO|GO.BP 0.00e+00 0e+00 1.552644 2234 1615 349
GO:0030154 cell differentiation GO|GO.BP 0.00e+00 0e+00 1.342091 2234 3453 645
GO:0048869 cellular developmental process GO|GO.BP 0.00e+00 0e+00 1.330092 2234 3630 672
GO:0010033 response to organic substance GO|GO.BP 0.00e+00 0e+00 1.389635 2234 2823 546
GO:0001775 cell activation GO|GO.BP 0.00e+00 0e+00 1.662767 2234 1171 271
JAMiller.AIBS.000136 Layer6 enriched in adult macaque cortex JA Miller at AIBS|Brain|Postnatal brain|Markers of cortex layers|Cortex 0.00e+00 0e+00 2.715020 2234 217 82
GO:0048731 system development GO|GO.BP 0.00e+00 0e+00 1.300740 2234 4082 739
GO:0032501 multicellular organismal process GO|GO.BP 0.00e+00 0e+00 1.221238 2234 6048 1028
JAMiller.AIBS.000434 Genes bound by RELA in HUMAN FIBROSARCOMA from PMID 24523406 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1.690558 2234 1054 248
GO:0009966 regulation of signal transduction GO|GO.BP 0.00e+00 0e+00 1.379703 2234 2760 530
GO:0008283 cell proliferation GO|GO.BP 0.00e+00 0e+00 1.516515 2234 1644 347
GO:0048522 positive regulation of cellular process GO|GO.BP 0.00e+00 0e+00 1.262935 2234 4665 820
GO:0035295 tube development GO|GO.BP 0.00e+00 0e+00 1.728209 2234 898 216
GO:0048585 negative regulation of response to stimulus GO|GO.BP 0.00e+00 0e+00 1.549872 2234 1451 313
JAMiller.AIBS.000084 Bergman glia JA Miller at AIBS|Brain|Postnatal brain|Cell type markers 0.00e+00 0e+00 1.605754 2234 1226 274
JAMiller.AIBS.000062 CortexWGCNA 15-21 post-conception weeks C36 SZ/VZenriched JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 0.00e+00 0e+00 2.928864 2234 157 64
GO:0009653 anatomical structure morphogenesis GO|GO.BP 0.00e+00 0e+00 1.409650 2234 2314 454
GO:0051239 regulation of multicellular organismal process GO|GO.BP 0.00e+00 0e+00 1.365073 2234 2758 524
GO:0009986 cell surface GO|GO.CC 0.00e+00 0e+00 1.830406 2234 683 174
JAMiller.AIBS.000085 Cerebellar astrocytes JA Miller at AIBS|Brain|Postnatal brain|Cell type markers 0.00e+00 0e+00 1.539260 2234 1447 310
GO:0045321 leukocyte activation GO|GO.BP 0.00e+00 0e+00 1.646389 2234 1043 239
GO:0042981 regulation of apoptotic process GO|GO.BP 0.00e+00 0e+00 1.561692 2234 1325 288
GO:0042127 regulation of cell proliferation GO|GO.BP 0.00e+00 0e+00 1.551637 2234 1366 295
GO:0007275 multicellular organism development GO|GO.BP 0.00e+00 0e+00 1.256250 2234 4564 798
GO:0006954 inflammatory response GO|GO.BP 0.00e+00 0e+00 1.887551 2234 590 155
GO:0010941 regulation of cell death GO|GO.BP 0.00e+00 0e+00 1.530065 2234 1451 309
GO:0002252 immune effector process GO|GO.BP 0.00e+00 0e+00 1.663097 2234 985 228
GO:0023051 regulation of signaling GO|GO.BP 0.00e+00 0e+00 1.328506 2234 3153 583
GO:0016477 cell migration GO|GO.BP 0.00e+00 0e+00 1.574213 2234 1246 273
JAMiller.AIBS.000099 Cortical oligodendrocytes (Cmtm5) JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex 0.00e+00 0e+00 1.669136 2234 947 220
GO:0043067 regulation of programmed cell death GO|GO.BP 0.00e+00 0e+00 1.547263 2234 1342 289
GO:0050793 regulation of developmental process GO|GO.BP 0.00e+00 0e+00 1.398545 2234 2281 444
JAMiller.AIBS.000537 Genes bound by TP63 in HUMAN EP156T from PMID 23658742 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1.348757 2234 2818 529
JAM:002965 nucleus subceruleus_IN_Pontine Tegmentum JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 2.893236 2234 149 60
GO:0010646 regulation of cell communication GO|GO.BP 0.00e+00 0e+00 1.325477 2234 3106 573
GO:0098609 cell-cell adhesion GO|GO.BP 0.00e+00 0e+00 1.764534 2234 737 181
GO:0034097 response to cytokine GO|GO.BP 0.00e+00 0e+00 1.639405 2234 1008 230
GO:0040011 locomotion GO|GO.BP 0.00e+00 0e+00 1.491716 2234 1575 327
JAMiller.AIBS.000154 Highest in VZ of E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Germinal brain 0.00e+00 0e+00 1.462738 2234 1729 352
GO:0048870 cell motility GO|GO.BP 0.00e+00 0e+00 1.523905 2234 1372 291
GO:0051674 localization of cell GO|GO.BP 0.00e+00 0e+00 1.523905 2234 1372 291
GO:0008219 cell death GO|GO.BP 0.00e+00 0e+00 1.433122 2234 1865 372
JAMiller.AIBS.000193 CortexWGCNA midfetal M23 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.00e+00 0e+00 1.619628 2234 1007 227
GO:0071345 cellular response to cytokine stimulus GO|GO.BP 0.00e+00 0e+00 1.647338 2234 929 213
JAMiller.AIBS.000472 Genes bound by SOX2 in HUMAN SW620 from PMID 20726797 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1.424943 2234 1911 379
GO:0009617 response to bacterium GO|GO.BP 0.00e+00 0e+00 1.989403 2234 437 121
JAMiller.AIBS.000460 Genes bound by SMAD2 in human HaCaT from PMID 18955504 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1.491200 2234 1484 308
JAMiller.AIBS.000461 Genes bound by SMAD3 in human HaCaT from PMID 18955504 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1.491200 2234 1484 308
GO:0035556 intracellular signal transduction GO|GO.BP 0.00e+00 0e+00 1.352578 2234 2571 484
JAM:003110 White Matter JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 2.745810 2234 157 60
GO:0006952 defense response GO|GO.BP 0.00e+00 0e+00 1.535958 2234 1263 270
GO:0050900 leukocyte migration GO|GO.BP 0.00e+00 0e+00 2.102391 2234 352 103
GO:0035239 tube morphogenesis GO|GO.BP 0.00e+00 0e+00 1.729873 2234 731 176
GO:0044421 extracellular region part GO|GO.CC 0.00e+00 0e+00 1.330628 2234 2743 508
JAMiller.AIBS.000304 Genes bound by FOXA2 in human HepG2 from PMID 19822575 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1.345228 2234 2553 478
GO:0016021 integral component of membrane GO|GO.CC 0.00e+00 0e+00 1.247971 2234 4174 725
GO:0023057 negative regulation of signaling GO|GO.BP 0.00e+00 0e+00 1.529192 2234 1231 262
GO:0005615 extracellular space GO|GO.CC 0.00e+00 0e+00 1.338796 2234 2576 480
GO:0005576 extracellular region GO|GO.CC 0.00e+00 0e+00 1.290380 2234 3274 588
JAMiller.AIBS.000289 Genes bound by ESR1 in MOUSE UTERI from PMID 22446102 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1.544062 2234 1154 248
GO:0002443 leukocyte mediated immunity GO|GO.BP 0.00e+00 0e+00 1.739495 2234 665 161
GO:0010648 negative regulation of cell communication GO|GO.BP 0.00e+00 0e+00 1.521227 2234 1228 260
GO:0072358 cardiovascular system development GO|GO.BP 0.00e+00 0e+00 1.759328 2234 633 155
GO:0060429 epithelium development GO|GO.BP 0.00e+00 0e+00 1.597463 2234 967 215
GO:0072359 circulatory system development GO|GO.BP 0.00e+00 0e+00 1.601674 2234 951 212
GO:0001944 vasculature development GO|GO.BP 0.00e+00 0e+00 1.761675 2234 624 153
GO:0002274 myeloid leukocyte activation GO|GO.BP 0.00e+00 0e+00 1.796218 2234 576 144
GO:0009968 negative regulation of signal transduction GO|GO.BP 0.00e+00 0e+00 1.542335 2234 1132 243
GO:0002366 leukocyte activation involved in immune response GO|GO.BP 0.00e+00 0e+00 1.769672 2234 609 150
GO:0006950 response to stress GO|GO.BP 0.00e+00 0e+00 1.279060 2234 3376 601
GO:0044459 plasma membrane part GO|GO.CC 0.00e+00 0e+00 1.341137 2234 2459 459
GO:0012501 programmed cell death GO|GO.BP 0.00e+00 0e+00 1.418878 2234 1747 345
GO:0002684 positive regulation of immune system process GO|GO.BP 0.00e+00 0e+00 1.618214 2234 888 200
GO:0031224 intrinsic component of membrane GO|GO.CC 0.00e+00 0e+00 1.235370 2234 4298 739
GO:0051240 positive regulation of multicellular organismal process GO|GO.BP 1.00e-07 0e+00 1.454081 2234 1512 306
GO:0044425 membrane part GO|GO.CC 1.00e-07 0e+00 1.195299 2234 5494 914
GO:0002263 cell activation involved in immune response GO|GO.BP 1.00e-07 0e+00 1.758125 2234 613 150
GO:0006915 apoptotic process GO|GO.BP 1.00e-07 0e+00 1.425028 2234 1684 334
GO:0048584 positive regulation of response to stimulus GO|GO.BP 1.00e-07 0e+00 1.378843 2234 2027 389
GO:0048646 anatomical structure formation involved in morphogenesis GO|GO.BP 1.00e-07 0e+00 1.594071 2234 933 207
GO:0002682 regulation of immune system process GO|GO.BP 1.00e-07 0e+00 1.494050 2234 1284 267
GO:1902531 regulation of intracellular signal transduction GO|GO.BP 1.00e-07 0e+00 1.420809 2234 1689 334
GO:0043066 negative regulation of apoptotic process GO|GO.BP 1.00e-07 0e+00 1.669115 2234 749 174
GO:0009897 external side of plasma membrane GO|GO.CC 1.00e-07 0e+00 2.248923 2234 246 77
GO:0009887 animal organ morphogenesis GO|GO.BP 1.00e-07 0e+00 1.603766 2234 896 200
JAMiller.AIBS.000163 Genes decreasing in fetal and increasing in aging JA Miller at AIBS|Brain|Age-associated genes|Cortex 1.00e-07 0e+00 3.647703 2234 65 33
JAM:003106 Ventral Thalamus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 1.00e-07 0e+00 2.784833 2234 129 50
GO:0048523 negative regulation of cellular process GO|GO.BP 2.00e-07 0e+00 1.234359 2234 4156 714
GO:0048514 blood vessel morphogenesis GO|GO.BP 2.00e-07 0e+00 1.810035 2234 520 131
JAM:003076 noChangeAD_oligo_plasmaMembrane JAM|BrainLists|BrainLists.Blalock_AD 2.00e-07 0e+00 3.444801 2234 73 35
GO:0001568 blood vessel development GO|GO.BP 2.00e-07 0e+00 1.747997 2234 596 145
GO:0030855 epithelial cell differentiation GO|GO.BP 2.00e-07 0e+00 1.813793 2234 511 129
GO:0051094 positive regulation of developmental process GO|GO.BP 3.00e-07 0e+00 1.500600 2234 1197 250
GO:0043069 negative regulation of programmed cell death GO|GO.BP 3.00e-07 0e+00 1.645749 2234 764 175
JAMiller.AIBS.000280 Genes bound by EP300 in MOUSE FORBRAIN MIDBRAIN LIMB HEART from PMID 20729851 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 4.00e-07 0e+00 1.484949 2234 1258 260
GO:0051241 negative regulation of multicellular organismal process GO|GO.BP 4.00e-07 0e+00 1.535229 2234 1053 225
GO:0060548 negative regulation of cell death GO|GO.BP 5.00e-07 0e+00 1.604223 2234 842 188
JAM:002984 Paravermis_IN_Cerebellar Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 6.00e-07 0e+00 2.758990 2234 125 48
JAMiller.AIBS.000063 CortexWGCNA 15-21 post-conception weeks C37 SZ/VZenriched JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 6.00e-07 0e+00 2.900315 2234 109 44
JAMiller.AIBS.000365 Genes bound by MYB in MOUSE ERMYB from PMID 21317192 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 7.00e-07 0e+00 1.630807 2234 771 175
GO:0051093 negative regulation of developmental process GO|GO.BP 8.00e-07 0e+00 1.603379 2234 829 185
GO:0009607 response to biotic stimulus GO|GO.BP 8.00e-07 0e+00 1.651923 2234 722 166
JAMiller.AIBS.000112 HippocampusWGCNA greenyellow SGZenriched astrocyte JA Miller at AIBS|Brain|Postnatal brain|WGCNA 9.00e-07 0e+00 2.190509 2234 246 75
GO:0031325 positive regulation of cellular metabolic process GO|GO.BP 1.00e-06 0e+00 1.289463 2234 2864 514
JAMiller.AIBS.000463 Genes bound by SMAD4 in HUMAN A2780 from PMID 21799915 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 1.00e-06 0e+00 1.350154 2234 2102 395
GO:0001816 cytokine production GO|GO.BP 1.10e-06 0e+00 1.692536 2234 641 151
JAMiller.AIBS.000110 HippocampusWGCNA cyan SGZenriched upAge glia/gliogenesis JA Miller at AIBS|Brain|Postnatal brain|WGCNA 1.40e-06 0e+00 2.535836 2234 153 54
GO:0008284 positive regulation of cell proliferation GO|GO.BP 1.50e-06 0e+00 1.627302 2234 755 171
JAMiller.AIBS.000465 Genes bound by SMAD4 in HUMAN HESC from PMID 21741376 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 1.60e-06 0e+00 1.380936 2234 1795 345
GO:0009611 response to wounding GO|GO.BP 1.70e-06 0e+00 1.730388 2234 573 138
GO:0050794 regulation of cellular process GO|GO.BP 1.90e-06 0e+00 1.114039 2234 8984 1393
GO:0042060 wound healing GO|GO.BP 2.20e-06 0e+00 1.807610 2234 473 119
GO:0002237 response to molecule of bacterial origin GO|GO.BP 2.50e-06 0e+00 2.078480 2234 280 81
GO:0009893 positive regulation of metabolic process GO|GO.BP 2.70e-06 0e+00 1.268326 2234 3110 549
GO:0042119 neutrophil activation GO|GO.BP 2.80e-06 0e+00 1.832710 2234 443 113
GO:0036230 granulocyte activation GO|GO.BP 3.10e-06 0e+00 1.824221 2234 449 114
GO:0006928 movement of cell or subcellular component GO|GO.BP 3.60e-06 0e+00 1.373109 2234 1800 344
GO:0065007 biological regulation GO|GO.BP 3.60e-06 0e+00 1.096105 2234 10147 1548
GO:0002520 immune system development GO|GO.BP 4.00e-06 0e+00 1.577585 2234 838 184
GO:0006959 humoral immune response GO|GO.BP 4.10e-06 0e+00 2.632624 2234 131 48
GO:0030155 regulation of cell adhesion GO|GO.BP 4.20e-06 0e+00 1.694770 2234 602 142
JAMiller.AIBS.000491 Genes bound by STAT3 in MOUSE CD4+T from PMID 20064451 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 4.20e-06 0e+00 1.597652 2234 787 175
GO:0048534 hematopoietic or lymphoid organ development GO|GO.BP 4.90e-06 0e+00 1.592616 2234 794 176
GO:0001817 regulation of cytokine production GO|GO.BP 5.30e-06 0e+00 1.703629 2234 582 138
GO:0033993 response to lipid GO|GO.BP 5.80e-06 0e+00 1.591585 2234 790 175
JAMiller.AIBS.000176 CortexWGCNA midfetal M6 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 6.20e-06 0e+00 1.548739 2234 900 194
GO:0051707 response to other organism GO|GO.BP 6.40e-06 0e+00 1.637191 2234 689 157
GO:0009719 response to endogenous stimulus GO|GO.BP 6.70e-06 0e+00 1.418066 2234 1444 285
GO:1901652 response to peptide GO|GO.BP 7.50e-06 0e+00 1.800165 2234 455 114
GO:0043207 response to external biotic stimulus GO|GO.BP 8.10e-06 0e+00 1.632452 2234 691 157
JAMiller.AIBS.000477 Genes bound by SOX9 in MOUSE HFSC from PMID 24532713 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 8.10e-06 0e+00 1.466053 2234 1186 242
JAMiller.AIBS.000241 Genes bound by CLOCK in HUMAN 293T from PMID 20551151 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 1.06e-05 1e-07 1.865480 2234 389 101
GO:1901700 response to oxygen-containing compound GO|GO.BP 1.22e-05 1e-07 1.416664 2234 1415 279
GO:0032496 response to lipopolysaccharide GO|GO.BP 1.34e-05 1e-07 2.056636 2234 269 77
GO:0002444 myeloid leukocyte mediated immunity GO|GO.BP 1.97e-05 1e-07 1.745313 2234 494 120
GO:0032101 regulation of response to external stimulus GO|GO.BP 2.48e-05 1e-07 1.617646 2234 684 154
GO:0030667 secretory granule membrane GO|GO.CC 2.79e-05 2e-07 2.048917 2234 263 75
GO:0071216 cellular response to biotic stimulus GO|GO.BP 3.27e-05 2e-07 2.245272 2234 192 60
GO:0080134 regulation of response to stress GO|GO.BP 3.55e-05 2e-07 1.419700 2234 1331 263
GO:2000026 regulation of multicellular organismal development GO|GO.BP 3.64e-05 2e-07 1.349382 2234 1821 342
GO:0070161 anchoring junction GO|GO.CC 3.72e-05 2e-07 1.691329 2234 548 129
GO:0030097 hemopoiesis GO|GO.BP 3.81e-05 2e-07 1.574637 2234 762 167
GO:0012505 endomembrane system GO|GO.CC 3.94e-05 2e-07 1.212819 2234 3981 672
GO:0043299 leukocyte degranulation GO|GO.BP 4.17e-05 2e-07 1.743609 2234 478 116
GO:0051246 regulation of protein metabolic process GO|GO.BP 5.57e-05 3e-07 1.289970 2234 2434 437
GO:0002446 neutrophil mediated immunity GO|GO.BP 6.01e-05 3e-07 1.767835 2234 443 109
JAM:002841 Emboliform Nucleus_IN_Cerebellar Nucleus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 6.02e-05 3e-07 2.411030 2234 149 50
GO:0046903 secretion GO|GO.BP 6.09e-05 3e-07 1.396779 2234 1430 278
GO:1901653 cellular response to peptide GO|GO.BP 6.17e-05 3e-07 1.891874 2234 338 89
GO:0071495 cellular response to endogenous stimulus GO|GO.BP 6.42e-05 3e-07 1.433449 2234 1223 244
JAMiller.AIBS.000506 Genes bound by SUZ12 in MOUSE MESC from PMID 20075857 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 6.54e-05 3e-07 1.233382 2234 3402 584
GO:0045595 regulation of cell differentiation GO|GO.BP 6.64e-05 3e-07 1.368547 2234 1617 308
JAMiller.AIBS.000055 CortexWGCNA 15-21 post-conception weeks C29 SGenriched JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 6.78e-05 3e-07 2.056959 2234 248 71
GO:0019221 cytokine-mediated signaling pathway GO|GO.BP 7.41e-05 4e-07 1.620227 2234 643 145
GO:0071219 cellular response to molecule of bacterial origin GO|GO.BP 7.71e-05 4e-07 2.309423 2234 168 54
JAM:002881 Head of Caudate Nucleus_IN_Striatum JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 7.72e-05 4e-07 2.525931 2234 128 45
GO:0001525 angiogenesis GO|GO.BP 8.32e-05 4e-07 1.763559 2234 440 108
GO:0032940 secretion by cell GO|GO.BP 8.50e-05 4e-07 1.410926 2234 1324 260
GO:0001932 regulation of protein phosphorylation GO|GO.BP 9.39e-05 4e-07 1.424288 2234 1246 247
kable(enrichment$enrichmentTable %>% filter(class==top_modules[2]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, desc(enrichmentRatio)),
      caption = paste0('Enriched terms for module ', top_modules[2], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[2]][1],4), ')'))
Enriched terms for module #D39200 (MTcor = -0.9999)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000109 HippocampusWGCNA brown JA Miller at AIBS|Brain|Postnatal brain|WGCNA 0.0000086 0.0000000 2.818248 349 767 47
JAM:002769 downAD_mitochondrion JAM|BrainLists|BrainLists.Blalock_AD 0.0000105 0.0000001 4.512364 349 265 26
JAMiller.AIBS.000052 CortexWGCNA 15-21 post-conception weeks C26 JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 0.0001548 0.0000007 2.727766 349 725 43
JAMiller.AIBS.000569 WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 0.0014249 0.0000052 1.526639 349 4067 135
JAMiller.AIBS.000205 RegionalWGCNA midfetal M35 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.0442692 0.0001118 5.536002 349 108 13
JAMiller.AIBS.000183 CortexWGCNA midfetal M13 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.0586986 0.0001446 1.958569 349 1315 56
JAMiller.AIBS.000106 Genes enriched in the hippocampal SGZ in mouse JA Miller at AIBS|Brain|Postnatal brain|Markers of cortex layers 0.3608531 0.0007203 2.993523 349 338 22
JAMiller.AIBS.000570 WGCNA Olivedrab2ModuleGenes with enriched ELAVL2 targets JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 1.0000000 0.0023508 2.480864 349 482 26
JAMiller.AIBS.000228 Genes bound by BMI1 in MOUSE NPCS from PMID 23680149 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 1.0000000 0.0026512 2.177498 349 697 33
JAMiller.AIBS.000349 Genes bound by KDM5B in MOUSE MESC from PMID 21448134 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 1.0000000 0.0073732 1.444917 349 2992 94

Save Enrichment Analysis results

save(enrichment, file='./../Data/enrichmentAnalysis.RData')
#load('./../Data/enrichmentAnalysis.RData')



Session info

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Scientific Linux 7.6 (Nitrogen)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] org.Hs.eg.db_3.10.0                      
##  [2] BrainDiseaseCollection_1.00              
##  [3] anRichment_1.01-2                        
##  [4] TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0
##  [5] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2  
##  [6] GenomicFeatures_1.38.2                   
##  [7] GenomicRanges_1.38.0                     
##  [8] GenomeInfoDb_1.22.0                      
##  [9] anRichmentMethods_0.90-1                 
## [10] WGCNA_1.68                               
## [11] fastcluster_1.1.25                       
## [12] dynamicTreeCut_1.63-1                    
## [13] GO.db_3.10.0                             
## [14] AnnotationDbi_1.48.0                     
## [15] IRanges_2.20.2                           
## [16] S4Vectors_0.24.3                         
## [17] Biobase_2.46.0                           
## [18] BiocGenerics_0.32.0                      
## [19] biomaRt_2.42.0                           
## [20] knitr_1.24                               
## [21] doParallel_1.0.15                        
## [22] iterators_1.0.12                         
## [23] foreach_1.4.7                            
## [24] polycor_0.7-10                           
## [25] expss_0.10.1                             
## [26] GGally_1.4.0                             
## [27] gridExtra_2.3                            
## [28] viridis_0.5.1                            
## [29] viridisLite_0.3.0                        
## [30] RColorBrewer_1.1-2                       
## [31] dendextend_1.13.3                        
## [32] plotly_4.9.2                             
## [33] glue_1.3.1                               
## [34] reshape2_1.4.3                           
## [35] forcats_0.4.0                            
## [36] stringr_1.4.0                            
## [37] dplyr_0.8.3                              
## [38] purrr_0.3.3                              
## [39] readr_1.3.1                              
## [40] tidyr_1.0.2                              
## [41] tibble_2.1.3                             
## [42] ggplot2_3.2.1                            
## [43] tidyverse_1.3.0                          
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1                backports_1.1.5            
##   [3] Hmisc_4.2-0                 BiocFileCache_1.10.2       
##   [5] plyr_1.8.5                  lazyeval_0.2.2             
##   [7] splines_3.6.0               crosstalk_1.0.0            
##   [9] BiocParallel_1.20.1         robust_0.4-18.2            
##  [11] digest_0.6.24               htmltools_0.4.0            
##  [13] fansi_0.4.1                 magrittr_1.5               
##  [15] checkmate_1.9.4             memoise_1.1.0              
##  [17] fit.models_0.5-14           cluster_2.0.8              
##  [19] annotate_1.64.0             Biostrings_2.54.0          
##  [21] modelr_0.1.5                matrixStats_0.55.0         
##  [23] askpass_1.1                 prettyunits_1.0.2          
##  [25] colorspace_1.4-1            blob_1.2.1                 
##  [27] rvest_0.3.5                 rappdirs_0.3.1             
##  [29] rrcov_1.4-7                 haven_2.2.0                
##  [31] xfun_0.8                    crayon_1.3.4               
##  [33] RCurl_1.95-4.12             jsonlite_1.6               
##  [35] genefilter_1.68.0           impute_1.60.0              
##  [37] survival_2.44-1.1           gtable_0.3.0               
##  [39] zlibbioc_1.32.0             XVector_0.26.0             
##  [41] DelayedArray_0.12.2         DEoptimR_1.0-8             
##  [43] scales_1.1.0                mvtnorm_1.0-11             
##  [45] DBI_1.1.0                   Rcpp_1.0.3                 
##  [47] xtable_1.8-4                progress_1.2.2             
##  [49] htmlTable_1.13.1            foreign_0.8-71             
##  [51] bit_1.1-15.2                preprocessCore_1.48.0      
##  [53] Formula_1.2-3               htmlwidgets_1.5.1          
##  [55] httr_1.4.1                  ellipsis_0.3.0             
##  [57] acepack_1.4.1               farver_2.0.3               
##  [59] pkgconfig_2.0.3             reshape_0.8.8              
##  [61] XML_3.99-0.3                nnet_7.3-12                
##  [63] dbplyr_1.4.2                locfit_1.5-9.1             
##  [65] later_1.0.0                 labeling_0.3               
##  [67] tidyselect_0.2.5            rlang_0.4.4                
##  [69] munsell_0.5.0               cellranger_1.1.0           
##  [71] tools_3.6.0                 cli_2.0.1                  
##  [73] generics_0.0.2              RSQLite_2.2.0              
##  [75] broom_0.5.4                 fastmap_1.0.1              
##  [77] evaluate_0.14               yaml_2.2.0                 
##  [79] bit64_0.9-7                 fs_1.3.1                   
##  [81] robustbase_0.93-5           nlme_3.1-139               
##  [83] mime_0.9                    xml2_1.2.2                 
##  [85] compiler_3.6.0              rstudioapi_0.10            
##  [87] curl_4.3                    reprex_0.3.0               
##  [89] geneplotter_1.64.0          pcaPP_1.9-73               
##  [91] stringi_1.4.6               highr_0.8                  
##  [93] lattice_0.20-38             Matrix_1.2-17              
##  [95] vctrs_0.2.2                 pillar_1.4.3               
##  [97] lifecycle_0.1.0             data.table_1.12.8          
##  [99] bitops_1.0-6                httpuv_1.5.2               
## [101] rtracklayer_1.46.0          R6_2.4.1                   
## [103] latticeExtra_0.6-28         promises_1.1.0             
## [105] codetools_0.2-16            MASS_7.3-51.4              
## [107] assertthat_0.2.1            SummarizedExperiment_1.16.1
## [109] DESeq2_1.26.0               openssl_1.4.1              
## [111] withr_2.1.2                 GenomicAlignments_1.22.1   
## [113] Rsamtools_2.2.2             GenomeInfoDbData_1.2.2     
## [115] hms_0.5.3                   grid_3.6.0                 
## [117] rpart_4.1-15                rmarkdown_1.14             
## [119] Cairo_1.5-10                shiny_1.4.0                
## [121] lubridate_1.7.4             base64enc_0.1-3